In [31]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
In [ ]:
df = pd.read_csv('C:/Users/AmlaHardk/Desktop/Methane_final.csv')

Data Preprocessing¶

  1. understand the dataset
  2. Remove unnecessary columns from data
  3. Add filters for selecting particular data for visualisation
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1548 entries, 0 to 1547
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1548 non-null   int64  
 1   region      1548 non-null   object 
 2   country     1548 non-null   object 
 3   emissions   1548 non-null   float64
 4   type        1548 non-null   object 
 5   segment     1548 non-null   object 
 6   reason      1548 non-null   object 
 7   baseYear    1548 non-null   object 
 8   notes       1548 non-null   object 
dtypes: float64(1), int64(1), object(7)
memory usage: 109.0+ KB
In [4]:
df = df.drop(columns = ['Unnamed: 0','notes'], inplace = True)
In [8]:
df.head()
Out[8]:
Unnamed: 0 region country emissions type segment reason baseYear notes
0 0 Africa Algeria 257.611206 Agriculture Total All 2019-2021 Average based on United Nations Framework Conv...
1 1 Africa Algeria 0.052000 Energy Bioenergy All 2022 Estimates from end-uses are for 2020 or 2021 (...
2 2 Africa Algeria 130.798996 Energy Gas pipelines and LNG facilities Fugitive 2022 Not available
3 3 Africa Algeria 69.741898 Energy Gas pipelines and LNG facilities Vented 2022 Not available
4 4 Africa Algeria 213.987000 Energy Onshore gas Fugitive 2022 Not available
In [9]:
df.describe()
Out[9]:
Unnamed: 0 emissions
count 1548.000000 1548.000000
mean 773.500000 643.255972
std 447.013423 5566.238201
min 0.000000 0.000459
25% 386.750000 2.659361
50% 773.500000 24.064669
75% 1160.250000 128.419594
max 1547.000000 141953.765625
In [12]:
df.isnull().sum()
Out[12]:
Unnamed: 0    0
region        0
country       0
emissions     0
type          0
segment       0
reason        0
baseYear      0
notes         0
dtype: int64
In [14]:
temp = df
temp=temp[(temp['region']!='World')&(temp['segment']!='Total')]
In [24]:
sns.barplot(x='region',y='emissions',data=region)
plt.xticks(rotation=90)
plt.title('total emissions in every region')
plt.show()
In [26]:
seg=df.groupby('segment')[['emissions']].sum()[:11].reset_index()

sns.barplot(x='segment',y='emissions',data=seg)
plt.title('emissions by their segment')
plt.xticks(rotation=90)
plt.show()
In [27]:
cntry=df.groupby('country')[['emissions']].sum().reset_index()
cntry=cntry.sort_values(by='emissions',ascending=False)[1:11]
cntry
Out[27]:
country emissions
16 China 81048.371586
98 United States 48604.877296
77 Russia 42432.929804
38 India 34852.007386
10 Brazil 21720.838126
39 Indonesia 19404.469000
29 European Union 18985.173461
65 Other 15997.357175
40 Iran 13030.685366
62 Nigeria 9903.895667
In [30]:
plt.figure(figsize=(15,6))
sns.barplot(x='country',y='emissions',data=cntry)
plt.title('countries with highest emission')
plt.show()
In [32]:
df.head()
Out[32]:
Unnamed: 0 region country emissions type segment reason baseYear notes
0 0 Africa Algeria 257.611206 Agriculture Total All 2019-2021 Average based on United Nations Framework Conv...
1 1 Africa Algeria 0.052000 Energy Bioenergy All 2022 Estimates from end-uses are for 2020 or 2021 (...
2 2 Africa Algeria 130.798996 Energy Gas pipelines and LNG facilities Fugitive 2022 Not available
3 3 Africa Algeria 69.741898 Energy Gas pipelines and LNG facilities Vented 2022 Not available
4 4 Africa Algeria 213.987000 Energy Onshore gas Fugitive 2022 Not available
In [37]:
data = df
year_emissions = data[data['country'] != 'World'].groupby('baseYear')['emissions'].sum()
In [42]:
px.sunburst(temp,values='emissions',path=['region','segment'],color='region',width=700,height=700)
In [47]:
px.pie(temp2,values='emissions',names='type',hole=0.5)
In [48]:
data = df
In [49]:
year_emissions = data[data['country'] != 'World'].groupby('baseYear')['emissions'].sum()

# Sort year emissions in descending order, then get top 10
top10_year_emissions = year_emissions.sort_values(ascending=False).head(10)

# Plot bar chart for top 10 year emissions
plt.figure(figsize=(12, 6))
plt.bar(top10_year_emissions.index, top10_year_emissions.values, color=plt.cm.Paired(range(len(top10_year_emissions))))
plt.xlabel('Year', fontsize=12)
plt.ylabel('Emissions', fontsize=12)
plt.title('Top 10 Years by Emissions (excluding World)', fontsize=16)
plt.xticks(rotation=45)

# Show the plot
plt.show()
In [ ]: